################################################################################ 
################################################################################ 
#################### Meta-analysis on the Contact Hypothesis ###################
################################################################################ 
################################################################################ 

# This script runs the heterogeneity analysis using a Lasso of characteristics
# for the project 
# Meta-analysis on the Contact Hypothesis
# by Gwen-Jiro Clochard


################################################################################ 
################################## Import data #################################
################################################################################ 


all_measures <- 
  read_excel(paste(pathdata, "Clean//All_measures.xlsx", sep="//"))

indiv <- 
  read_excel(paste(pathdata, "Clean//Individual_papers.xlsx", sep="//"))


################################################################################ 
############################ Lasso for all variables ###########################
################################################################################ 


##### Lasso #####


### Making sure that variables are factors

all_measures <- 
  all_measures %>%
  mutate(
    Population = factor(Population), 
    zone = factor(zone),
    Prejudice = factor(Prejudice),
    Contact_type = factor(Contact_type),
    Nb_encounters = factor(Nb_encounters),
    type_outcome = factor(type_outcome),
    allgroup = factor(allgroup)
  )


### Model matrix

X <- model.matrix(
  effect ~ Year + Published + Mean_age + 
    Population + zone + Prejudice + Contact_type +
    Nb_encounters + Duration + type_outcome + 
    allgroup + Time_since_contact + 
    mean_common_2 + mean_equal_2 + mean_positive_2 +
    mean_support_2 + mean_friendship_2 +
    common_gpt + equal_gpt + positive_gpt +
    support_gpt + friendship_gpt +
    common_GJ + equal_GJ + positive_GJ +
    support_GJ + friendship_GJ,
  data = all_measures
)[, -1]   # removes intercept

y <- all_measures$effect

w <- all_measures$N

cv_model <- cv.glmnet(
  x = X,
  y = y,
  alpha = 1,        # Lasso
  weights = w,
  standardize = TRUE
)

best_lambda <- cv_model$lambda.min

lasso_model <- glmnet(
  x = X,
  y = y,
  alpha = 1,
  lambda = best_lambda,
  weights = w,
  standardize = TRUE
)


coef_mat <- as.matrix(coef(lasso_model))
nonzero <- coef_mat[coef_mat[, 1] != 0, , drop = FALSE]
nonzero


##### Post Lasso Meta Regression #####

selected_vars <- rownames(coef_mat)[coef_mat[, 1] != 0]
selected_vars <- setdiff(selected_vars, "(Intercept)")
selected_vars


### Create necessary variables

all_measures <-
  all_measures %>%
  mutate(
    contact_5 = as.numeric(Contact_type == "5. Neighbors")
  )


### Post-Lasso OLS

lm1 <- 
  lm(
    data = all_measures, 
    formula = effect ~ Year + contact_5 + Time_since_contact 
    + mean_common_2 + mean_friendship_2, 
    weights = weight
  )
summary(lm1)

# Coefficients
coef_ols <- coef(lm1)

# Clustered VCOV
cluster_se <- vcovCL(lm1, cluster = ~Paper_id)

# Robust SE
se_ols <- sqrt(diag(cluster_se))

# p-values (normal approximation)
t_ols  <- coef_ols / se_ols
p_ols  <- 2 * (1 - pnorm(abs(t_ols)))

# Number of observations
n_ols <- nobs(lm1)

# Number of clusters
G_ols <- length(unique(all_measures$Paper_id))

# Significance stars
stars_ols <- ifelse(p_ols < 0.005, "***",
                    ifelse(p_ols < 0.01, "**",
                           ifelse(p_ols < 0.05,  "*", "")))


# OLS output
ols_out <- data.frame(
  var = names(coef_ols),
  b_ols = coef_ols,
  se_ols = se_ols,
  p_ols = p_ols,
  stringsAsFactors = FALSE
)
ols_out$stars_ols <- stars_ols

r2_ols <- summary(lm1)$r.squared


### Linear Meta-regression

dat <- all_measures %>%
  mutate(
    e  = effect,
    se = se_effect,             
    v  = se_effect^2,
    paper_id = as.integer(factor(Paper_id)), 
    obs_id  = row_number()
  ) %>%
  filter(!is.na(e) & !is.na(se) & !is.na(paper_id)) %>%
  as.data.frame()

## Fitting model
fit_time <- rma.mv(
  yi     = e,
  V      = v,
  mods   = ~ Year + contact_5 + Time_since_contact 
  + mean_common_2 + mean_friendship_2,
  random = list(~ 1 | paper_id/obs_id),
  data   = dat,
  method = "REML"
)

## Cluster-robust inference
rob_time <- coef_test(
  fit_time,
  vcov    = "CR2",
  cluster = dat$paper_id
)

meta_out <- data.frame(
  var     = rownames(rob_time),
  b_meta  = rob_time$beta,
  se_meta = rob_time$SE,
  p_meta  = rob_time$p_Satt,   # CR2 p-values
  stringsAsFactors = FALSE
)
# Number of observations
n_meta <- nrow(dat)

# Number of clusters
G_meta <- length(unique(dat$paper_id))

# Significance stars (CR2 p-values)
stars_meta <- ifelse(meta_out$p_meta < 0.005, "***",
                     ifelse(meta_out$p_meta < 0.01, "**",
                            ifelse(meta_out$p_meta < 0.05,  "*", "")))

meta_out$stars_meta <- stars_meta


meta_out$var[meta_out$var == "intrcpt"] <- "(Intercept)"


### Merge both models

combined <- merge(ols_out, meta_out, by = "var", all = TRUE)

combined$var[combined$var == "(Intercept)"] <- "Constant"


table_out <- combined %>%
  mutate(
    OLS  = sprintf("%.3f%s\n(%.3f)", b_ols, stars_ols, se_ols),
    META = sprintf("%.3f%s\n(%.3f)", b_meta, stars_meta, se_meta)
  ) %>%
  select(var, OLS, META)

stats_rows <- data.frame(
  var  = c("Num. clusters", "Num. obs"),
  OLS  = c(G_ols, n_ols),
  META = c(G_meta, n_meta)
)

table_final <- rbind(table_out, stats_rows)

table_final$var <- 
  c("Constant", 
    "Intervention type: Neighbors",
    "Common goal experiment", 
    "Friendship potential experiment",
    "Time since contact", 
    "Year", 
    "Num. clusters", 
    "Num. obs"
  )


### Export table

print(
  xtable(table_final),
  file = file.path(pathtables, "combined_regressions_post_Lasso.tex"),
  include.rownames = FALSE,
  sanitize.text.function = identity,
  comment = FALSE
)


################################################################################ 
####################### Publication bias for common goal #######################
################################################################################ 


### Estimation 

lm1 <- 
  lm(
    effect ~ contact_5 + Time_since_contact 
    + mean_common_2 + mean_friendship_2 + Year + se_effect,
    data = all_measures,
    weights = N
  )
summary(lm1)

cluster_se <- vcovCL(lm1, cluster = ~Paper_id) # Compute clustered standard errors (by team_id)
robust_se_1 <- sqrt(diag(cluster_se)) # Extract the robust standard errors
t_values <- coef(lm1) / robust_se_1 # Calculate t-values
p_values_1 <- 2 * (1 - pnorm(abs(t_values))) # Calculate p-values

### Producing table

collabels <- c("Effect size")

covlabels <- c("Intervention type: Neighbors",
               "Time since contact", 
               "Common goal experiment", 
               "Friendship potential experiment",
               "Year", 
               "SE effect",
               "Constant"
               )

notes = "The dependent variable is the standardized effect size. Robust standard errors clustered at the paper level."

title = "Publication bias for the negative link between common goal and effect size"

label = "tab: publication bias common goal"

stargazer(lm1, 
          type='latex', 
          table.placement = "htbp",
          title = title,
          se = list(robust_se_1),  # Pass the clustered SEs here
          p = list(p_values_1),  # Pass manually computed p-values
          style = "all2",
          label = label,
          column.labels = collabels,
          covariate.labels = covlabels,
          # keep = variables_to_keep,
          keep.stat = c("rsq", "n"),
          star.cutoffs = c(0.05, 0.01, 0.005),
          summary = FALSE,
          dep.var.caption = NULL,
          mean.sd = TRUE,
          digits = 3,
          notes = notes,
          out = paste(pathtables, "Publication_bias_commongoal.tex", sep="//"),
          notes.append = FALSE,
          notes.align = "l",
          header = FALSE
)



################################################################################ 
################################ Clearing memory ###############################
################################################################################ 

rm(
  all_measures, indiv, 
  cluster_se, coef_mat, combined, cv_model, dat, fit_time, 
  lasso_model, lm1, meta_out, nonzero, ols_out, rob_time, 
  stats_rows, table_final, table_out, X, best_lambda, coef_ols, 
  G_meta, G_ols, n_meta, n_ols, p_ols, se_ols, selected_vars, 
  stars_meta, stars_ols, t_ols, w, y, 
  collabels, covlabels, label, notes, title,
  p_values_1, robust_se_1, r2_ols, t_values
)


################################################################################ 
#################################### THE END ###################################
################################################################################ 